

/* Find group of 5 comparators per appicant for debt relief matched on age and sex (alive year prior to application) */



libname raw 'G:\Data\Rawdata\707677';

libname common 'G:\Data\Workdata\707677\Common\01Data\01SAS';





* Loop over years and charateristics;

* What to do with persons where do not have charateristics(gender and age);


* Load national data;


data list(keep=koen aar alder pnr debt_relief_id granted chara); * Sample in cross sectional right? year before treatment(aar);
set common.list; 

debt_relief_id+1;

chara= cats(koen, alder);

proc sort; by chara;
run;


proc sql;
select max( aar), min(aar) into :max_aar,:min_aar
from list;
quit;



%put &max_aar &min_aar;

* Conditional on granted?;

* From national sample only need pnr and matching variables;

data national_dis(keep=pnr_national alder aar koen chara);
set raw.idapall;
rename pnr=pnr_national;
chara= cats(koen, alder);
run;

proc sort data=national_dis; by pnr_national; run;

data doed(keep= pnr_national daar); * Is death a needed requirement, we only have obs to the year prior to dead?;
set raw.doede;
rename pnr=pnr_national;
run;

proc sort data=doed; by pnr_national; run;

data national_dis;
merge national_dis(in=a) doed(in=b);
by pnr_national;
if a=1;
proc sort; by chara;
run;

data list;
set list;
if chara>0; * Delete obs where we dont have charateristics, what we do with real data if the same is a problem?;
run;



%macro loop_years();
%do i=&min_aar %to &max_aar;

data national_dis_year;
set national_dis;
if aar=&i;
if daar>(&i+1) or daar=.; * Dead after treatment year;
run;

data list_year;
set list;
if aar=&i;
run;

proc sql; 
create table means as
select chara,
count(*) as Total
from list_year
group by chara;
quit;

data means;
set means;
obs+1;
run;

proc sql; 
select count(Total) into :count_obs
from means;
quit;



%put &count_obs; 

%loop_chara() 


 * Remove individuals who are already in matching sample;
* This part is slow;
proc sql;
create table national_dis as
select * from national_dis 
where pnr_national not in(select pnr_national from matches_final_year); quit; 


proc datasets nolist;
append base=matches_final data= matches_final_year force;
delete matches_final_year;
run;


quit;
%end;
%mend loop_years;


%macro loop_chara();
%do j=1 %to &count_obs;



data _null_;
set means;
if obs=&j then do;
call symput('chara_a',chara);
call symput('Total_a', Total);
end;
run;

data matches;
set national_dis_year;
where (chara="&chara_a");
run;

%let size=%eval(&Total_a*5);

proc surveyselect data=matches method=SRS sampsize=&size seed=101 out=matches_sampled; run;
* SRS should secure without replacement;
data temp;
set matches_sampled;
debt_relief_id=&i; * to merge back to pnr from debt relief sample;
run;

proc datasets nolist;
append base=matches_final_year data=temp force;
delete temp matches matches_sampled;
run;







quit;
%end;
%mend loop_chara;

%loop_years()


data common.matching_sample;
set matches_final; 
run;





proc datasets library=work nolist kill;

quit;
